Data analysis workflow for: Correlated evolution of body size and ommatidia size in tiger moths (Lepidoptera: Arctiidae)

Author

Félix P. Leiva

Published

5th Feb 2026

This supplementary file contains the R code for processing and analysing the raw data, and creating figures for the manuscript Correlated evolution of body size and ommatidia size in tiger moths (Lepidoptera: Arctiidae). The manuscript is authored by Michał Rajda, Łukasz Przybyłowicz, Félix P. Leiva and Marcin Czarnoleski.

Load and clean data

# load data and tidy columns and levels in the dataframe
dat <- read_xlsx("../data/Moths_cell_size_data_26_01_2026.xlsx", sheet = "Data") %>%
  rename(
    ommatidia_area     = OmArea,
    forewing_lenght    = For,
    hindwing_lenght    = Hind,
    specimen_id = ID,
    species = Species,
    sex = Sex
  )

Create new columns and recode when is needed

dat <- dat %>%
  mutate(
    log10_ommatidia_area = log10(ommatidia_area),
    log10_forewing_lenght = log10(forewing_lenght),
    log10_hindwing_lenght = if_else(
      is.na(hindwing_lenght),
      NA_real_,
      log10(hindwing_lenght)
    ),
   # We used the forewing–hindwing ratio as a proxy for flight style: values
   # greater than 1 indicate a more active, maneuverable flight (as in
   # hawk moths), whereas values closer to 1 indicate a more gliding flight
   # style. These two flight modes reflect differences in the capacity for
   # sharp turning versus sustained long-distance flight.
    forewing_hindwing_ratio = if_else(
      is.na(hindwing_lenght),
      NA_real_,
      forewing_lenght / hindwing_lenght
    ), # calculate ratio between forewing and hindwing, explicitaly considering NAs
    species = gsub(" ", "_", species), # add underscore (_) between Genus and species (Genus_species)
    species = gsub("\\.", "", species), # Remove dots from species
    phylo = species,
    sex = recode(sex, "M" = "male", "F" = "female"), # Replace M/F by male/female
    Lifestyle = recode(Lifestyle, "Diurnal"= "diurnal", "Nocturnal"= "nocturnal")
  )

Select only columns of interest and apply standard steps

dat <- dat %>%
  select(-ForHind) %>% #  exclude these three columns, not useful at all
  rename_with(tolower) %>%
  rename_with(~ str_replace_all(., "\\s+", "_"))  # Replace double spaces by underscores in column names

Check data structure and transform variables if necessary

glimpse(dat)
Rows: 190
Columns: 14
$ lifestyle               <chr> "diurnal", "diurnal", "diurnal", "diurnal", "d…
$ origin                  <chr> "Poland", "Poland", "Poland", "Poland", "Polan…
$ tribe                   <chr> "Syntomini", "Syntomini", "Syntomini", "Syntom…
$ species                 <chr> "Amata_phegea", "Amata_phegea", "Amata_phegea"…
$ specimen_id             <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ sex                     <chr> "female", "female", "female", "male", "male", …
$ forewing_lenght         <dbl> 17, 18, 17, 17, 19, 20, 16, 15, 14, 15, 15, 15…
$ hindwing_lenght         <dbl> 8.4, 8.3, 8.0, 8.5, 9.7, 10.1, 13.2, 12.9, 12.…
$ ommatidia_area          <dbl> 453, 494, 490, 641, 594, 627, 359, 345, 350, 3…
$ log10_ommatidia_area    <dbl> 2.7, 2.7, 2.7, 2.8, 2.8, 2.8, 2.6, 2.5, 2.5, 2…
$ log10_forewing_lenght   <dbl> 1.2, 1.3, 1.2, 1.2, 1.3, 1.3, 1.2, 1.2, 1.2, 1…
$ log10_hindwing_lenght   <dbl> 0.92, 0.92, 0.90, 0.93, 0.99, 1.00, 1.12, 1.11…
$ forewing_hindwing_ratio <dbl> 2.1, 2.2, 2.2, 2.0, 2.0, 1.9, 1.2, 1.2, 1.2, 1…
$ phylo                   <chr> "Amata_phegea", "Amata_phegea", "Amata_phegea"…

Variables to convert to factor

columns_to_factor <- c(
  "lifestyle", 
  "origin", 
  "tribe", 
  "species",
  "specimen_id",   
  "sex",
  "phylo"
)

Variables to convert to numeric

columns_to_numeric <- c(
"forewing_lenght",
"hindwing_lenght",
"ommatidia_area",
"log10_ommatidia_area",
"log10_forewing_lenght",
"log10_hindwing_lenght",
"forewing_hindwing_ratio"
)

Apply transformation

# Apply the conversions to the dataset.
dat <- dat %>%
  mutate(
    across(all_of(columns_to_factor), as.factor),
    across(all_of(columns_to_numeric), as.numeric)
  )

# Confirm that changes have been applied correctly
glimpse(dat)
Rows: 190
Columns: 14
$ lifestyle               <fct> diurnal, diurnal, diurnal, diurnal, diurnal, d…
$ origin                  <fct> Poland, Poland, Poland, Poland, Poland, Poland…
$ tribe                   <fct> Syntomini, Syntomini, Syntomini, Syntomini, Sy…
$ species                 <fct> Amata_phegea, Amata_phegea, Amata_phegea, Amat…
$ specimen_id             <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ sex                     <fct> female, female, female, male, male, male, fema…
$ forewing_lenght         <dbl> 17, 18, 17, 17, 19, 20, 16, 15, 14, 15, 15, 15…
$ hindwing_lenght         <dbl> 8.4, 8.3, 8.0, 8.5, 9.7, 10.1, 13.2, 12.9, 12.…
$ ommatidia_area          <dbl> 453, 494, 490, 641, 594, 627, 359, 345, 350, 3…
$ log10_ommatidia_area    <dbl> 2.7, 2.7, 2.7, 2.8, 2.8, 2.8, 2.6, 2.5, 2.5, 2…
$ log10_forewing_lenght   <dbl> 1.2, 1.3, 1.2, 1.2, 1.3, 1.3, 1.2, 1.2, 1.2, 1…
$ log10_hindwing_lenght   <dbl> 0.92, 0.92, 0.90, 0.93, 0.99, 1.00, 1.12, 1.11…
$ forewing_hindwing_ratio <dbl> 2.1, 2.2, 2.2, 2.0, 2.0, 1.9, 1.2, 1.2, 1.2, 1…
$ phylo                   <fct> Amata_phegea, Amata_phegea, Amata_phegea, Amat…

Calculate mean per species and sex

dat_mean <- dat %>%
  group_by(species, sex) %>%
  summarise(across(c(ommatidia_area, forewing_lenght, hindwing_lenght, forewing_hindwing_ratio),
                   \(x) mean(x, na.rm = TRUE)),
            .groups = "drop") %>%
  mutate(across(c(ommatidia_area, forewing_lenght, hindwing_lenght),
                ~ log10(.), .names = "log10_{col}")) %>%
  left_join(distinct(dat, lifestyle, origin, tribe, species, sex, phylo), by = c("species", "sex")) %>%
  select(lifestyle, origin, tribe, species, sex, phylo, everything())

Phylogeny

What we will see next is the phylogenetic tree constructed by Łukasz Przybyłowicz, one of the co-authors of the manuscript, which includes 31 moth species.

Because we aim to account for the evolutionary history of these species, we need to generate a phylogenetic tree in a format such as .tree or .nwk. This will later allow us to import it into R to calculate the variance–covariance matrix and the corresponding correlation needed for the statistical models.

The following code first defines a Newick-formatted string (species_moths) representing the evolutionary relationships among 31 species, as shown in the previous Figure. This string is read using the read.tree() function (from the ape package) and a phylo object, which can be plotted to display the same relationships as in the previous figure. The tree is also exported and saved in the output folder.

Generate topology

Here, what I do is simply copy and paste the species names and use parentheses. For more details about the Newick formatting, I suggest to check at The Newick tree format.

species_moths <- 
"((((((Eilema_complana,Eilema_sororcula),Lithosia_quadra),Atolmis_rubricollis),Pelosia_muscerda),Miltochrista_miniata),(Amerila_luteibarba,
((Balacra_caeruleifascia,(Pseudothyretes_kamitugensis,
(Amata_phegea,Amata_sp,Amata_nigriceps))),
((((Callimorpha_dominula,Euplagia_quadripunctaria),(Thyria_jacobaeae,Spiris_striata)),
((Spilosoma_urticae,Phragmatobia_fuliginosa),(Diacrisia_sannio,
(Pericallia_matronula,(Arctia_caja,Arctia_villica))))),
(Dysschema_sacrifica,
((Euchromia_lethe,Cosmosoma_sp,Horama_panthalon,Syntomeida_epilais),
((Cyanopepla_jucunda,Macrocneme_leucostigma,Ctenucha_virginica,Dahana_atripennis)
 )))))));"

# Read tree from Newick string
tree_moths <- read.tree(text = species_moths)

# Get tip labels (species)
tree_moths$tip.label
 [1] "Eilema_complana"             "Eilema_sororcula"           
 [3] "Lithosia_quadra"             "Atolmis_rubricollis"        
 [5] "Pelosia_muscerda"            "Miltochrista_miniata"       
 [7] "Amerila_luteibarba"          "Balacra_caeruleifascia"     
 [9] "Pseudothyretes_kamitugensis" "Amata_phegea"               
[11] "Amata_sp"                    "Amata_nigriceps"            
[13] "Callimorpha_dominula"        "Euplagia_quadripunctaria"   
[15] "Thyria_jacobaeae"            "Spiris_striata"             
[17] "Spilosoma_urticae"           "Phragmatobia_fuliginosa"    
[19] "Diacrisia_sannio"            "Pericallia_matronula"       
[21] "Arctia_caja"                 "Arctia_villica"             
[23] "Dysschema_sacrifica"         "Euchromia_lethe"            
[25] "Cosmosoma_sp"                "Horama_panthalon"           
[27] "Syntomeida_epilais"          "Cyanopepla_jucunda"         
[29] "Macrocneme_leucostigma"      "Ctenucha_virginica"         
[31] "Dahana_atripennis"          

Plot the tree

plotTree(tree_moths, ftype = "i", lwd = 2)

Export the tree

write.tree(tree_moths, "../outputs/tree_moths_updated.tre")

Merge phylogeny and data

The next step is to check whether all the species in our dataframe are included in the tree and vice versa. To do this, we will compare the species names listed in the “phylo” column with the tip labels of the tree.

setdiff(tree_moths$tip.label, dat_mean$phylo)      # Tree species absent in database
[1] "Spilosoma_urticae" "Diacrisia_sannio" 
setdiff(dat_mean$phylo, tree_moths$tip.label)      # Database species absent in tree
[1] "Diacrisia_sanio"  "Spilosoma_urtice"

What happens here is that there are two species included in the phylogenetic tree whose names are not written correctly. What I’ll do is check in the [Catalogue of Life](https://www.catalogueoflife.org/) whether these species names are properly spelled. If you try it yourself, you’ll see that the names in the dataframe contain minor misspellings - nothing serious, but we need to correct these typos to ensure that all species in the dataframe are included in the phylogenetic tree. The two species in question are Diacrisia_sanio and Spilosoma_urtice, which should actually be Diacrisia_sannio and Spilosoma_urticae, respectively.

# Correct the misspelled species names in the dataframe column
dat_mean$species <- gsub("Diacrisia_sanio", "Diacrisia_sannio", dat_mean$species)
dat_mean$species <- gsub("Spilosoma_urtice", "Spilosoma_urticae", dat_mean$species)

dat_mean$phylo <- gsub("Diacrisia_sanio", "Diacrisia_sannio", dat_mean$phylo)
dat_mean$phylo <- gsub("Spilosoma_urtice", "Spilosoma_urticae", dat_mean$phylo)
# Re-check for mismatches after pruning
setdiff(tree_moths$tip.label, dat_mean$phylo)
character(0)
setdiff(dat_mean$phylo, tree_moths$tip.label)
character(0)

Perfect!!! Now all species in the dataframe are included in the tree and viceversa

Compute branch length using Grafen

tree <- compute.brlen(tree_moths, method = "Grafen")

Compute of phylogenetic correlation matrix

Ojo acá! Note that the vignette by Paul Bürkner does not specify the argument corr = TRUE. For the purposes of our analysis, however, it is necessary to include this argument in order to compute the correlation between species, given their evolutionary history as represented by the phylogeny.

This allows us to see that the values range from 0 to 1. Values close to 1 indicate a high correlation between species, meaning that species are expected to share similar values for the trait of interest.

A <- ape::vcv.phylo(tree, corr = TRUE)


rownames(A) <- colnames(A) <- levels(dat$phylo)
A <- A[levels(dat$phylo), levels(dat$phylo)]

Figure 1

Prepare data

dat_for_figure_1 <- dat_mean %>%
  select(1:3,6)

species_counts <- dat_for_figure_1 %>%
  group_by(phylo) %>%
  summarise(n_unique_combinations = n_distinct(lifestyle, origin, tribe), .groups = "drop")

species_single_attributes <- species_counts %>%
  filter(n_unique_combinations == 1) %>%
  pull(phylo)

dat_for_figure_1 <- dat_for_figure_1 %>%
  filter(phylo %in% species_single_attributes) %>%
  distinct(phylo, .keep_all = TRUE)

summary_data <- dat_for_figure_1 %>%
  filter(phylo %in% tree_moths$tip.label)

Create matrices with rownames matching tree tip labels

dat_moths_lifestyle <- summary_data %>%
  column_to_rownames("phylo") %>%
  select(lifestyle) %>%
  as.matrix() %>%
  .[tree_moths$tip.label, , drop = FALSE]

dat_moths_origin <- summary_data %>%
  column_to_rownames("phylo") %>%
  select(origin) %>%
  as.matrix() %>%
  .[tree_moths$tip.label, , drop = FALSE]

dat_moths_tribe <- summary_data %>%
  column_to_rownames("phylo") %>%
  select(tribe) %>%
  as.matrix() %>%
  .[tree_moths$tip.label, , drop = FALSE]

Make Figure

## Function to abbreviate species names (Genus_species  is then G. species)

format_species <- function(x) {

  parts <- strsplit(x, "_")[[1]]

  # If no second term, return unchanged
  if (length(parts) < 2) {
    return(x)
  }

  genus   <- parts[1]
  species <- parts[2]

  # sp, sp1, sp. while keep genus and remove underscore
  if (grepl("^sp", species, ignore.case = TRUE)) {
    return(paste(genus, species))
  }

  # Valid species... abbreviate genus
  paste0(substr(genus, 1, 1), ". ", species)
}


# Colores lifestyle
lifestyle_colours <- c(
  "diurnal"   = "#FFEFD5",
  "nocturnal" = "gray20"
)

## Base tree. Circular (personal prefercne)
circ <- ggtree(tree_moths, layout = "fan", open.angle = 8) %>%
  rotate_tree(90) +
  geom_tiplab2(
    aes(label = vapply(label, format_species, character(1))),
    size = 2,
    align = TRUE,
    offset = 0
  )

# Heatmap Lifestyle
p1 <- gheatmap(
  circ,
  dat_moths_lifestyle,
  width = 0.1,
  offset = 3.4,
  colnames_angle = 0,
  colnames_offset_x = -0.1, 
  colnames_offset_y = 0.4,
  hjust = 0,
  font.size = 3.2
) +
  scale_fill_manual(
    values = lifestyle_colours,
    name = "Lifestyle"
  )

# Heatmap Origin
p2 <- p1 + new_scale_fill()

p2 <- gheatmap(
  p2,
  dat_moths_origin,
  width = 0.1,
  offset = 4.4,
  colnames_angle = -0.2,
  colnames_offset_x = 0, 
  colnames_offset_y = 0.3,
  hjust = 0,
  font.size = 3
) +
  # I just wanted to changed the end color
  scale_fill_viridis_d(
  option = "plasma",
  begin = 0.1,       
  end = 0.9,            
  name = "Origin"
)

# Heatmap 3: Tribe
p3 <- p2 + new_scale_fill()

Figure_1 <- gheatmap(
  p3,
  dat_moths_tribe,
  width = 0.1,
  offset = 5.4,
  colnames_angle = 0,
  colnames_offset_x = 0.1, 
  colnames_offset_y = 0.25,
  hjust = 0,
  font.size = 3
  ) +
  scale_fill_viridis_d(option = "D", name = "Tribe") +
  theme(
    legend.position = "right",      
    legend.justification = "left",     
    legend.margin = margin(5, 5, 5, 5), 
    plot.margin = margin(0, 0, 0, 5)    
  )

Figure_1

# Export figure
ggsave(
  "../outputs/Figure_1.pdf",
  Figure_1,
  width = 9,
  height = 9,
  units = "in"
)

Does larger-bodied moths evolved larger ommatidia?

Only body size

fit_1 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght +
    (1 | gr(phylo, cov = A)) +   # phylogeny
    (1 | species),               # species-specific effects (beyond phylogeny)
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Additive effect of sex

fit_2 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght + 
    sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effect of sex

fit_3 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght * 
    sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)
Running MCMC with 4 parallel chains, with 2 thread(s) per chain...

Chain 3 finished in 9.5 seconds.
Chain 1 finished in 9.7 seconds.
Chain 4 finished in 9.6 seconds.
Chain 2 finished in 9.9 seconds.

All 4 chains finished successfully.
Mean chain execution time: 9.7 seconds.
Total execution time: 10.0 seconds.

Best model

model_1 = add_criterion(fit_1, 'loo')
model_2 = add_criterion(fit_2, 'loo')
model_3 = add_criterion(fit_3, 'loo')
loo_compare(model_1,model_2,model_3)
        elpd_diff se_diff
model_3   0.0       0.0  
model_2  -4.0       3.1  
model_1 -54.4       7.3  

In this comparison, the model fit_3 which accounts for the interactive effects of sex and log10_forewing_length, is used as the reference with an elpd_diff of 0. In contrast, the model including only additive effects, fit_2, has an expected log predictive density (ELPD) approximately 4.0 units lower, with an associated standard error of 3.1. Within the framework of leave‑one‑out cross‑validation, differences that exceed roughly four times their standard error are typically considered strong evidence. Here, 4/3.1 ≈ 1.3, indicating that both models perform similarly well. However, the model fit_1, that excludes sex,can be disregarded, as it does not provide a good fit. Note this ones has 54.4/7.3 ≈ 7.5, so poor support.

How does lifestyle affect whether larger-bodied moths evolved larger ommatidia?

Additive effect of lifestyle

fit_4 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght + 
    lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effect of lifestyle

fit_5 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght *
    lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Additive effect of lifestyle plus sex

fit_6 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght +
    lifestyle +
    sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effect of lifestyle plus sex

fit_7 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght *
    lifestyle +
    sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effect of lifestyle and sex

fit_8 <- brm(
  log10_ommatidia_area ~ log10_forewing_lenght *
    lifestyle *
    sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Table 1

Figure 2

# Create new dataset for predictions
newdata <- dat %>%
  data_grid(
    log10_forewing_lenght = seq(
      min(log10_forewing_lenght),
      max(log10_forewing_lenght),
      length.out = 100
    ),
    sex = c("female", "male"),
    lifestyle
  )

# Posterior expected values (population-level only)
predicted_draws <- add_epred_draws(
  fit_8,
  newdata = newdata,
  re_formula = NA,
  ndraws = 500
)

# Add Unicode symbols for sex
sex_symbols <- bind_rows(
  tibble(
    sex = c("female", "male"),
    symbol = c("\u2640", "\u2642"),
    x = c(1.61, 1.61),
    y = c(2.63, 2.82),
    size = c(7, 7),
    lifestyle = "diurnal"
  ),
  tibble(
    sex = c("female", "male"),
    symbol = c("\u2640", "\u2642"),
    x = c(1.61, 1.61),
    y = c(2.63, 2.75),
    size = c(7, 7),
    lifestyle = "nocturnal"
  )
)

Figure_2 <-
  ggplot(predicted_draws, aes(x = log10_forewing_lenght, y = .epred,
      group = interaction(sex, lifestyle, .draw),colour = sex)) +
  geom_line(alpha = 0.08,linewidth = 0.3) + 
  geom_point(data = dat, aes(x = log10_forewing_lenght, y = log10_ommatidia_area,
      colour = sex), inherit.aes = FALSE, size = 1.6,
    alpha = 0.6) +
  geom_text(data = sex_symbols,aes(x = x, y = y, label = symbol, colour = sex),
            inherit.aes = FALSE, size = 7, fontface = "bold") +
  facet_wrap(~ lifestyle) +
  labs(
    x = expression(log[10]~"forewing length (mm)"),
    y = expression(log[10]~ommatidia~area~(mu*m^2)),
    colour = NULL) +
  scale_colour_manual(
    values = c(
      "female" = "#9370DB",
      "male"   = "#008B8B")) +
  scale_x_continuous(limits = c(1, 1.7), expand = c(0, 0)) +
  scale_y_continuous(limits = c(2.3, 3.1), expand = c(0, 0)) +
theme_bw() +
  theme(
    legend.position = "none", 
    axis.text       = element_text(size = 13),
    axis.title      = element_text(size = 15),
    strip.text      = element_text(size = 14),
    panel.border    = element_rect(
      colour = "black",
      fill   = NA,
      linewidth = 1
    )
  )

Figure_2

# Export Figure
ggsave("../outputs/Figure_2.png", Figure_2, width = 10, height = 5, units = "in")

Does forewing length differ between sexes and lifestyles?

Only sex

fit_forewing_length_1 <- brm(
  log10_forewing_lenght ~  sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Only lifestyle

fit_forewing_length_2 <- brm(
  log10_forewing_lenght ~  lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Additive effects of sex and lifestyle

fit_forewing_length_3 <- brm(
  log10_forewing_lenght ~  sex + lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effects of sex and life style

fit_forewing_length_4 <- brm(
  log10_forewing_lenght ~  sex * lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Model comparison

Running MCMC with 4 sequential chains, with 2 thread(s) per chain...

Chain 1 finished in 7.3 seconds.
Chain 2 finished in 5.1 seconds.
Chain 3 finished in 7.5 seconds.
Chain 4 finished in 7.3 seconds.

All 4 chains finished successfully.
Mean chain execution time: 6.8 seconds.
Total execution time: 27.7 seconds.

Running MCMC with 4 sequential chains, with 2 thread(s) per chain...

Chain 1 finished in 7.0 seconds.
Chain 2 finished in 9.2 seconds.
Chain 3 finished in 8.1 seconds.
Chain 4 finished in 6.8 seconds.

All 4 chains finished successfully.
Mean chain execution time: 7.8 seconds.
Total execution time: 31.5 seconds.

Does ommatidia area differ between sexes and lifestyles?

Only sex

fit_ommatidia_area_1 <- brm(
  log10_ommatidia_area ~  sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Only lifestyle

fit_ommatidia_area_2 <- brm(
  log10_ommatidia_area ~  lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Additive effects of sex and lifestyle

fit_ommatidia_area_3 <- brm(
  log10_ommatidia_area ~  sex + lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effects of sex and life style

fit_ommatidia_area_4 <- brm(
  log10_ommatidia_area ~  sex * lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Model comparison

Does forewing hindwing ratio differ between sexes and lifestyles?

Only sex

fit_forewing_hindwing_ratio_1 <- brm(
  forewing_hindwing_ratio ~  sex +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Only lifestyle

fit_forewing_hindwing_ratio_2 <- brm(
  forewing_hindwing_ratio ~  lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Additive effects of sex and lifestyle

fit_forewing_hindwing_ratio_3 <- brm(
  forewing_hindwing_ratio ~  sex + lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Interactive effects of sex and life style

fit_forewing_hindwing_ratio_4 <- brm(
  forewing_hindwing_ratio ~  sex * lifestyle +
    (1 | gr(phylo, cov = A)) +
    (1 | species),
  data = dat,
  family = gaussian(),
  data2 = list(A = A),
  prior = c(
    prior(normal(0,10), "b"),
    prior(normal(0,50), "Intercept"),
    prior(student_t(3,0,20), "sd"),
    prior(student_t(3,0,20), "sigma")
  ),
  seed = 6955,
  sample_prior = TRUE, 
  chains = 4, 
  cores = 4, 
  threads = threading(2),
  iter = 4000, 
  warmup = 2000,
  control = list(max_treedepth = 15,
                 adapt_delta = 0.95),
  save_pars = save_pars(all = TRUE),
  silent = TRUE, refresh = 0
)

Model comparison

Figure 3

# Define colors for each sex (these match the ones from Figure 2)
sex_colours <- c(
  "female" = "#9370DB",
  "male"   = "#008B8B"
)

# Define colors for lifestyle
lifestyle_colours <- c(
  "diurnal"   = "#FFEFD5",
  "nocturnal" = "gray20"
)

# Function to compute grand means and plot
make_plot <- function(fit, group_var, colours, xlab) {

  newdat <- expand.grid(
    sex = c("female", "male"),
    lifestyle = c("diurnal", "nocturnal"),
    species = NA,
    phylo = NA
  )
# calculate posterior draws with accounting varyong effects on intercepts (species and phylo)
  ep <- posterior_epred(
    fit,
    newdata = newdat,
    re_formula = NA # main interest rely on fixed effects.
  )

  # here we calculate the margilalised effects by sex or lifesatyle
  gm <- as_tibble(ep) |>
    mutate(draw = row_number()) |>
    pivot_longer(-draw, names_to = "col", values_to = "value") |>
    mutate(
      col = as.integer(sub("V", "", col)),
      group = newdat[[group_var]][col]
    ) |>
    group_by(draw, group) |>
    summarise(grand_mean = mean(value), .groups = "drop") |>
    mutate(group = factor(group, levels = names(colours)))

  ggplot(gm, aes(x = grand_mean, y = group, fill = group)) +
    geom_density_ridges(scale = 3, alpha = 0.6, color = "white") +
    stat_halfeye(
      scale = 0.8,
      justification = 0,
      slab_alpha = 0,
      interval_alpha = 1
    ) +
    scale_fill_manual(values = colours) +
    labs(x = xlab, y = NULL, fill = NULL) +
    theme_bw(base_size = 12) +
    theme(
      axis.text.y = element_blank(),      #remove axis
      axis.ticks.y = element_blank(),
      legend.position = "top",            
      legend.background = element_blank(),
      legend.key = element_blank(),
      legend.text = element_text(size = 12)
    )
}

# Build final figure (2 columnas and 3 filas)
Figure_3 <-
  (make_plot(fit_forewing_length_4, "sex", sex_colours, "Forewing length") |
     make_plot(fit_forewing_length_4, "lifestyle", lifestyle_colours, "Forewing length")) /
  (make_plot(fit_ommatidia_area_4,"sex",sex_colours, "Ommatidia area") |
     make_plot(fit_ommatidia_area_4, "lifestyle", lifestyle_colours, "Ommatidia area")) /
  (make_plot(fit_forewing_hindwing_ratio_4, "sex", sex_colours, "Forewing to hindwing ratio") | make_plot(fit_forewing_hindwing_ratio_4, "lifestyle", lifestyle_colours, "Forewing to hindwing ratio")) + 
  plot_layout(guides = "collect") &       # cool sarguemnt!
  theme(legend.position = "top")

Figure_3

# Export
ggsave(
  "../Outputs/Figure_3.pdf",
  Figure_3,
  width = 8,
  height = 10,
  units = "in"
)

Session information

R version 4.3.2 (2023-10-31)

Platform: aarch64-apple-darwin20 (64-bit)

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: rstan(v.2.32.7), StanHeaders(v.2.32.10), ggnewscale(v.0.5.2), ggridges(v.0.5.6), patchwork(v.1.3.0), emmeans(v.1.11.1), DT(v.0.33), modelr(v.0.1.11), viridis(v.0.6.5), viridisLite(v.0.4.2), posterior(v.1.6.1), lubridate(v.1.9.4), forcats(v.1.0.0), readr(v.2.1.5), tidyverse(v.2.0.0), furrr(v.0.3.1), future(v.1.40.0), cmdstanr(v.0.9.0), purrr(v.1.0.4), loo(v.2.8.0), bayestestR(v.0.16.1), tidybayes(v.3.0.7), bayesplot(v.1.13.0), dplyr(v.1.1.4), brms(v.2.22.0), Rcpp(v.1.1.0), tidyr(v.1.3.1), stringr(v.1.5.1), details(v.0.4.0), ggthemes(v.5.1.0), tibble(v.3.2.1), ggtree(v.3.10.1), cowplot(v.1.1.3), ggpubr(v.0.6.0), ggplot2(v.3.5.2), kableExtra(v.1.4.0), readxl(v.1.4.5), phytools(v.2.4-4), maps(v.3.4.3), ape(v.5.8-1) and knitr(v.1.50)

loaded via a namespace (and not attached): svUnit(v.1.0.6), ggplotify(v.0.1.2), cellranger(v.1.1.0), datawizard(v.1.1.0), lifecycle(v.1.0.4), rstatix(v.0.7.2), doParallel(v.1.0.17), globals(v.0.17.0), processx(v.3.8.6), lattice(v.0.22-7), MASS(v.7.3-60), insight(v.1.3.1), crosstalk(v.1.2.1), ggdist(v.3.3.3), backports(v.1.5.0), magrittr(v.2.0.3), sass(v.0.4.10), rmarkdown(v.2.29), jquerylib(v.0.1.4), yaml(v.2.3.10), pkgbuild(v.1.4.8), RColorBrewer(v.1.1-3), abind(v.1.4-8), expm(v.1.0-0), quadprog(v.1.5-8), yulab.utils(v.0.2.0), tensorA(v.0.36.2.1), inline(v.0.3.21), listenv(v.0.9.1), tidytree(v.0.4.6), bridgesampling(v.1.1-2), parallelly(v.1.45.0), svglite(v.2.2.1), codetools(v.0.2-20), xml2(v.1.3.8), tidyselect(v.1.2.1), aplot(v.0.2.5), farver(v.2.1.2), matrixStats(v.1.5.0), stats4(v.4.3.2), jsonlite(v.2.0.0), Formula(v.1.2-5), iterators(v.1.0.14), systemfonts(v.1.2.3), foreach(v.1.5.2), tools(v.4.3.2), treeio(v.1.26.0), ragg(v.1.4.0), glue(v.1.8.0), mnormt(v.2.1.1), gridExtra(v.2.3), xfun(v.0.52), distributional(v.0.5.0), withr(v.3.0.2), numDeriv(v.2016.8-1.1), combinat(v.0.0-8), fastmap(v.1.2.0), callr(v.3.7.6), digest(v.0.6.37), timechange(v.0.3.0), R6(v.2.6.1), gridGraphics(v.0.5-1), estimability(v.1.5.1), textshaping(v.1.0.1), colorspace(v.2.1-1), generics(v.0.1.4), data.table(v.1.17.4), clusterGeneration(v.1.3.8), httr(v.1.4.7), htmlwidgets(v.1.6.4), scatterplot3d(v.0.3-44), pkgconfig(v.2.0.3), gtable(v.0.3.6), htmltools(v.0.5.8.1), carData(v.3.0-5), scales(v.1.4.0), png(v.0.1-8), ggfun(v.0.1.8), rstudioapi(v.0.17.1), tzdb(v.0.5.0), reshape2(v.1.4.4), coda(v.0.19-4.1), checkmate(v.2.3.2), nlme(v.3.1-168), curl(v.6.2.3), cachem(v.1.1.0), DEoptim(v.2.2-8), parallel(v.4.3.2), desc(v.1.4.3), pillar(v.1.10.2), grid(v.4.3.2), vctrs(v.0.6.5), car(v.3.1-3), arrayhelpers(v.1.1-0), xtable(v.1.8-4), evaluate(v.1.0.4), mvtnorm(v.1.3-3), cli(v.3.6.5), compiler(v.4.3.2), rlang(v.1.1.6), future.apply(v.1.11.3), rstantools(v.2.4.0), ggsignif(v.0.6.4), labeling(v.0.4.3), ps(v.1.9.1), plyr(v.1.8.9), fs(v.1.6.6), pander(v.0.6.6), stringi(v.1.8.7), QuickJSR(v.1.7.0), lazyeval(v.0.2.2), optimParallel(v.1.0-2), Brobdingnag(v.1.2-9), V8(v.6.0.4), Matrix(v.1.6-1.1), hms(v.1.1.3), clipr(v.0.8.0), igraph(v.2.1.4), broom(v.1.0.8), bslib(v.0.9.0), RcppParallel(v.5.1.10), phangorn(v.2.12.1) and fastmatch(v.1.1-6)